data source : https://archive.ics.uci.edu/dataset/602/dry+bean+dataset
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff
sns.set_theme(palette="pastel")
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
C:\Users\USER\AppData\Roaming\Python\Python311\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
data = pd.read_excel('data/Dry_Bean_Dataset.xlsx')
data
| Area | Perimeter | MajorAxisLength | MinorAxisLength | AspectRation | Eccentricity | ConvexArea | EquivDiameter | Extent | Solidity | roundness | Compactness | ShapeFactor1 | ShapeFactor2 | ShapeFactor3 | ShapeFactor4 | Class | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 28395 | 610.291 | 208.178117 | 173.888747 | 1.197191 | 0.549812 | 28715 | 190.141097 | 0.763923 | 0.988856 | 0.958027 | 0.913358 | 0.007332 | 0.003147 | 0.834222 | 0.998724 | SEKER |
| 1 | 28734 | 638.018 | 200.524796 | 182.734419 | 1.097356 | 0.411785 | 29172 | 191.272750 | 0.783968 | 0.984986 | 0.887034 | 0.953861 | 0.006979 | 0.003564 | 0.909851 | 0.998430 | SEKER |
| 2 | 29380 | 624.110 | 212.826130 | 175.931143 | 1.209713 | 0.562727 | 29690 | 193.410904 | 0.778113 | 0.989559 | 0.947849 | 0.908774 | 0.007244 | 0.003048 | 0.825871 | 0.999066 | SEKER |
| 3 | 30008 | 645.884 | 210.557999 | 182.516516 | 1.153638 | 0.498616 | 30724 | 195.467062 | 0.782681 | 0.976696 | 0.903936 | 0.928329 | 0.007017 | 0.003215 | 0.861794 | 0.994199 | SEKER |
| 4 | 30140 | 620.134 | 201.847882 | 190.279279 | 1.060798 | 0.333680 | 30417 | 195.896503 | 0.773098 | 0.990893 | 0.984877 | 0.970516 | 0.006697 | 0.003665 | 0.941900 | 0.999166 | SEKER |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 13606 | 42097 | 759.696 | 288.721612 | 185.944705 | 1.552728 | 0.765002 | 42508 | 231.515799 | 0.714574 | 0.990331 | 0.916603 | 0.801865 | 0.006858 | 0.001749 | 0.642988 | 0.998385 | DERMASON |
| 13607 | 42101 | 757.499 | 281.576392 | 190.713136 | 1.476439 | 0.735702 | 42494 | 231.526798 | 0.799943 | 0.990752 | 0.922015 | 0.822252 | 0.006688 | 0.001886 | 0.676099 | 0.998219 | DERMASON |
| 13608 | 42139 | 759.321 | 281.539928 | 191.187979 | 1.472582 | 0.734065 | 42569 | 231.631261 | 0.729932 | 0.989899 | 0.918424 | 0.822730 | 0.006681 | 0.001888 | 0.676884 | 0.996767 | DERMASON |
| 13609 | 42147 | 763.779 | 283.382636 | 190.275731 | 1.489326 | 0.741055 | 42667 | 231.653248 | 0.705389 | 0.987813 | 0.907906 | 0.817457 | 0.006724 | 0.001852 | 0.668237 | 0.995222 | DERMASON |
| 13610 | 42159 | 772.237 | 295.142741 | 182.204716 | 1.619841 | 0.786693 | 42600 | 231.686223 | 0.788962 | 0.989648 | 0.888380 | 0.784997 | 0.007001 | 0.001640 | 0.616221 | 0.998180 | DERMASON |
13611 rows × 17 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 13611 entries, 0 to 13610 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Area 13611 non-null int64 1 Perimeter 13611 non-null float64 2 MajorAxisLength 13611 non-null float64 3 MinorAxisLength 13611 non-null float64 4 AspectRation 13611 non-null float64 5 Eccentricity 13611 non-null float64 6 ConvexArea 13611 non-null int64 7 EquivDiameter 13611 non-null float64 8 Extent 13611 non-null float64 9 Solidity 13611 non-null float64 10 roundness 13611 non-null float64 11 Compactness 13611 non-null float64 12 ShapeFactor1 13611 non-null float64 13 ShapeFactor2 13611 non-null float64 14 ShapeFactor3 13611 non-null float64 15 ShapeFactor4 13611 non-null float64 16 Class 13611 non-null object dtypes: float64(14), int64(2), object(1) memory usage: 1.8+ MB
data.isnull().sum()
Area 0 Perimeter 0 MajorAxisLength 0 MinorAxisLength 0 AspectRation 0 Eccentricity 0 ConvexArea 0 EquivDiameter 0 Extent 0 Solidity 0 roundness 0 Compactness 0 ShapeFactor1 0 ShapeFactor2 0 ShapeFactor3 0 ShapeFactor4 0 Class 0 dtype: int64
sns.pairplot(data, hue='Class')
plt.tight_layout()
c:\Users\USER\anaconda3\Lib\site-packages\seaborn\axisgrid.py:123: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs) C:\Users\USER\AppData\Local\Temp\ipykernel_65624\2456656178.py:2: UserWarning: The figure layout has changed to tight plt.tight_layout()